 
/* 
	Purpose: This do-file cleans up the individual ANES files (1956 through 1970), 
	         keeping the variables we'll use from each one.

	Notes: (1) Some years have weights, while others don't. 
	           Look at memo "NotesWeights" for more detail on when to use weights 
	           and which variable to use.
	       (2) All raw data names listed are the same as the download from the 
	           ANES website.

	Creates: ANES_56to70cleaner.dta
*/

clear 
set more off

cd "$Mydirectory1/1_DataSources/ANES/"	

********************************************************************************
********************************************************************************

**************************************************************
*** 1956 
**************************************************************
/*Note: This file is the 1st year of a 3-year panel 
with some cross-sectional respondents and some panel respondents.*/

use "./RawData/NES1956.dta", clear //download data from ANES website 
gen year=1956

keep year V560002 V560007 V560120-V560129 V560171-V560182 V560178 V560173 ///
V560188-V560191 V560298 V560183 V560295 V560131 V560130 V560177 V560088 V560095 V560096 V560203

** No weights here
gen weight=1

* Father occupation
rename V560298 fatherocc 

bysort fatherocc: gen nvals = _n ==1
count if nvals 

* Now we rename all the other variables
	rename V560002 id
	rename V560007 state
	rename V560171 sex
	rename V560172 race
	rename V560175 ageRbucket
	rename V560295 ageR
	rename V560176 ageHH
	rename V560125 occHH
	rename V560128 oldoccHH
	rename V560120 occR 
	rename V560121 indR
	rename V560123 oldoccR
	rename V560124 oldindR
	rename V560126 indHH
	rename V560129 oldindHH
	rename V560174 relate
	rename V560181 educationR
	rename V560182 placeborn
	rename V560188 placegrew 
	rename V560189 raised  
	rename V560190 fam_inc_raw 
	rename V560191 famincHH
	rename V560183 origin
	rename V560130 union_raw 
	rename V560177 marital
	rename V560178 R_totnumkids_0to17_livinginhh 
	rename V560173 number_adults
	rename V560088 R_partyid_detailed
	rename V560095 vote48
	rename V560096 vote52
	rename V560203 vote56

/*
	Note: Construct unique id. Per ANES documentation 
	(https://electionstudies.org/wp-content/uploads/2018/12/anes_timeseries_cdf_codebook_var.pdf), 
	this is done by combining year + id variable. 
*/
tostring year id, gen(stryear strid_temp)
egen id_temp = concat(stryear strid_temp)
destring id_temp, gen(id_anes)

* Clean up age
replace ageR=. if ageR==98 | ageR==99
gen agesq = ageR * ageR

* Variable for head of household
gen head = relate==1

* Fix race variable--categories are white, Black, other.
replace race =. if race>2 // 1= white, 2=Black
tab race,m 

* Children variable
replace R_totnumkids_0to17_livinginhh=. if R_totnumkids_0to17_livinginhh==99
//note: no topcoded values

* Number of adults in household
replace number_adults=. if number_adults==9

*flag for indeterminate (6+) # of adults
	gen flag_6plus_adultsinhh = (number_adults==6) if number_adults<.
	tab flag_6plus_adultsinhh,m 
	label var flag_6plus_adultsinhh "Dummy =1 if there's an indeterminate (6+) # of adults in R's hh"

* Construct number in household 
gen R_hhsize_plusR = R_totnumkids_0to17_livinginhh + number_adults if (R_totnumkids_0to17_livinginhh!=. & number_adults!=.) 
tab R_hhsize_plusR,m 

gen R_hhsize_minusR = R_hhsize_plusR -1
tab R_hhsize_minusR,m 
	
label var R_hhsize_plusR "total # of persons in R's hh (including R)"
label var R_hhsize_minusR "total # of persons in R's hh (NOT including R)"		

* Hispanic variable
gen hispanic=0
replace hispanic=1 if placeborn==182 //Puerto Rico

* Variable for spouse
gen wifeR = relate==2 & sex==2  

* Variable for marriages
gen married = marital==1  
tab married, m

gen widowed = marital==5
tab widowed, m

gen divorced = marital==3
tab divorced, m

gen separated = marital==4
tab separated, m

gen never_married = marital==2 
tab never_married, m

* Variable for union household
tab union_raw 
gen union_hh=union_raw>0 & union_raw<8 if union_raw<8 

* Variable for respondent being in union
gen unionR=union_raw==1 | union_raw==3 | union_raw==5 | union_raw==7 if union_raw<8

* Variable for anyone besides the respondent being in a union
gen union_other = union_raw>=2 & union_raw<=7 if union_raw<8

* Variable for R and someone else in the household
gen union_Rplus = union_raw==3 | union_raw==5 | union_raw==7 if union_raw<8

* Employment variables for R and HH
	tab occR, m
	gen employed= occR>=1 & occR<=82 if occR<99
	gen retired = occR==95 if occR<99
	gen laborforce = employed==1 | occR==91 | occR==94 if occR<99 
	gen hwstd = occR==92 | occR==96 if occR<99

	tab occHH
	gen employedHH = occHH>=1 & occHH<=82 if occHH<99
	replace employedHH = 1 if employed==1 & occHH==98 & relate==1 //occHH labeled as 98 when respondent is head
	
	gen retiredHH = occHH==95 if occHH<99
	replace retiredHH = 1 if retired==1 & occHH==98 & relate==1 
	
	gen laborforceHH = occHH>=1 & occHH<=94 if occHH<99
	replace laborforceHH = 1 if laborforce==1 & occHH==98 & relate==1 
	
	gen hwstdHH = occHH==92 | occHH==96 if occHH<99
	replace hwstdHH = 1 if hwstd==1 & occHH==98 & relate==1 

** Clean up HH occ variable 
replace occHH=occR if occHH==98 & relate==1 
replace occHH=oldoccHH if occHH==91 | occHH==94 | occHH==95 //get occupation before they lost job or retired

** Clean up indR variable for unemployed
replace indR=oldindR if (occR==94 | occR==91) & oldindR<96 

** Clean up occR variable for unemployed
replace occR=oldoccR if occR==94 | occR==91

** Coarsened variables (following Gallup numbering)
foreach x of varlist occR occHH{
gen coarse_`x'=.
replace coarse_`x'=1 if `x'>=1 & `x'<=19 //professional
replace coarse_`x'=2 if `x'>=21 & `x'<=29 //managers
replace coarse_`x'=3 if `x'>=30 & `x'<=32 //clerical
replace coarse_`x'=8 if `x'>=33 & `x'<=39 //sales
replace coarse_`x'=4 if `x'>=41 & `x'<=49 //skilled
replace coarse_`x'=5 if `x'==51 //semi-skilled
replace coarse_`x'=7 if `x'>=61 & `x'<=69 //service
replace coarse_`x'=0 if `x'==71 | (`x'>=81 & `x'<=89)  //farm laborer and operators
replace coarse_`x'=6 if `x'>=78 & `x'<=79 //non-farm laborers
} 

* Coarsen father occ
	merge m:1 fatherocc using "../Crosswalks/Crosswalk_1956_ANES.dta"
	assert fatherocc==. if _merge==1
	drop if _merge==2
	drop _merge

* Coarsen household head occ and respondent
foreach x in HH R {
	preserve
	use "../Crosswalks/Crosswalk_1956_ANES.dta", clear
	rename fatherocc occ`x'
	rename fatheroccej occ`x'_ej

	tempfile cross1
	save `cross1'
	restore 

	replace occ`x'=. if occ`x'==98 | occ`x'==19
	merge m:1 occ`x' using `cross1'
	assert occ`x'==. if _merge==1
	drop if _merge==2
	drop _merge
	}


/* Cannot construct dummies for whether hh head was dad, mom, other male,
   or other female when R was growing up. Can construct headofhh_father_imputed
   --will assume all obs with non-missing father occupation had their father as their 
   head of hh when growing up.
*/
gen headofhh_father_imputed = 1 if fatheroccej!=.  

** DUMMIES FOR WHEN WE KNOW WHY DAD OR MOM DIDN'T WORK (I.E. WHY FATHEROCCEJ/MOTHEROCCEJ IS MISSING) 
gen father_notworking = .
replace father_notworking =1 if inrange(fatherocc,91,95) //unemployed, student, other general unemployed, retired
replace father_notworking =0 if inrange(fatheroccej,1,81) & father_notworking==.
tab father_notworking, m 
	
//mother_notworking--not available. 

//Note: unable to construct yrsschool var bc edu does not go from 0-18+

** Construct education variable that is consistent over time
gen eduR=.
replace eduR=0 if educationR==0 //no schooling
replace eduR=1 if educationR==1 //less than grade school 
replace eduR=2 if educationR==2 //completed grade school
replace eduR=3 if educationR==3  //some high school
replace eduR=4 if educationR==4 | educationR==5 //HS degree
replace eduR=5 if educationR==6 | educationR==7 //some college 
replace eduR=6 if educationR==8 //college degree
tab eduR,m 


** Consistent income variable over time using midpoints
tab fam_inc_raw
gen fam_inc=. 
replace fam_inc=0.75*1000 if fam_inc_raw==10 
replace fam_inc=1500 if fam_inc_raw==11 
replace fam_inc=2500 if fam_inc_raw==12 
replace fam_inc=3500 if fam_inc_raw==13 
replace fam_inc=4500 if fam_inc_raw==20 
replace fam_inc=5500 if fam_inc_raw==21 
replace fam_inc=6750 if fam_inc_raw==22 
replace fam_inc=8750 if fam_inc_raw==30 
replace fam_inc=12500 if fam_inc_raw==31 

//note: the suffix "_son" is used to match the variable names in other datasets. All respondents (i.e., male and female) are given a value for these variables. 
gen bottomcoded_son = fam_inc==750 if fam_inc<.
gen topcoded_son = fam_inc==12500 if fam_inc<.

** Variable for father foreign born
gen fatherforeign=0
replace fatherforeign=. if origin==99 | origin==49
replace fatherforeign=1 if (origin>=10 & origin<=20) | origin==31 | origin==42

* Variable for respondent being foreign born
gen foreignborn = placeborn>200 if placeborn<999

* Want state where R grew up 
gen stategrow=placegrew
replace stategrow=. if placegrew>200 //grew up in different country, 60
replace stategrow=. if placegrew==109 | placegrew==119 | placegrew==118 | placegrew==129 | placegrew==138 | placegrew==139 /// 
	| placegrew==150 | placegrew==159 | placegrew==169 | placegrew==179 | placegrew==178 | placegrew==198 ///
	| placegrew==199 | placegrew==182 //region, not state, available or puerto rico (62)
replace stategrow = stategrow - 100 //makes this code match the state codes
label var stategrow "State where R grew up"

* Variable for being in public-sector industry 
gen publicind=0
replace publicind=. if indR==96 | indR==99 //96 is unclassifiable, 99 is missing
replace publicind=1 if indR==40 //includes teachers, protective service officers 

* Variable for where respondent raised
gen cityfarm=.
replace cityfarm=1 if raised==0 //farm
replace cityfarm=2 if raised==1 //small town
replace cityfarm=3 if (raised>=2 & raised<=4) | raised==7 //small city
replace cityfarm=4 if (raised>=5 & raised<=6) | raised==8 //large city

gen cityfarm2=.
replace cityfarm2=1 if raised==0 //farm
replace cityfarm2=2 if raised==1 //small town
replace cityfarm2=3 if raised>=2 & raised<=8 //city 

gen urban=.
replace urban=0 if raised==0 | raised==1
replace urban=1 if raised>=2 & raised<=8

duplicates report id_anes //no duplicates
drop stryear *id_temp id
sort id_anes

tempfile ANES56 
save `ANES56' 

********************************************************************************
********************************************************************************

**************************************************************
*** 1958
**************************************************************
/*Note: This file is the 2nd year of a 3-year panel 
with some cross-sectional respondents and some panel respondents.*/

use "./RawData/NES1958.dta", clear //download data from ANES website 
gen year=1958

keep year V580002 V580003 V580007 V580008 V580009 V580170-V580194 V580202-V580204 V580177 V580172 V580062 ///
V580220 V580226 V580228 V580257 V580221 V580205 V580176 V580222 V580223 V580224 V580225 V580069 V580071 V580074

** Father occupation
rename V580257 fatherocc 

bysort fatherocc: gen nvals = _n ==1
count if nvals 

rename V580003 weight

** Rename the other variables
	rename V580002 id
	rename V580009 state
	rename V580170 sex
	rename V580171 race
	rename V580174 ageR
	rename V580180 educationR
	rename V580203 fam_inc_raw 
	rename V580205 union_raw 
	rename V580181 occR
	rename V580184 oldoccR
	rename V580182 indR
	rename V580185 oldindR
	rename V580189 indHH
	rename V580192 oldindHH
	rename V580188 occHH
	rename V580191 oldoccHH
	rename V580173 relate
	rename V580220 placeborn
	rename V580226 placegrew 
	rename V580221 origin
	rename V580228 raised 
	rename V580176 marital
	rename V580177 R_totnumkids_0to17_livinginhh 
	rename V580172 number_adults
	rename V580062 R_partyid_detailed
	rename V580069 Rfather_partyid
	rename V580071 Rmother_partyid
	rename V580074 vote56

/*
    Note: Construct unique id. Per ANES documentation 
    (https://electionstudies.org/wp-content/uploads/2018/12/anes_timeseries_cdf_codebook_var.pdf), 
    this is done by combining year + id variable. 
*/
tostring year id, gen(stryear strid_temp)
egen id_temp = concat(stryear strid_temp)
destring id_temp, gen(id_anes)

//-----------------------------------------------------------------------------//
//-----------------------------------------------------------------------------//

*************************************************
* RESTRICT SAMPLE TO CROSS-SECTION RESPONDENTS *
*************************************************

*Restrict based on age
gen ageR1956 = ageR - 2
tab ageR1956, m

/*note: This cross section includes both new respondents and panel 
        respondents from 1956. Will only keep the new respondents 
        (i.e. non-reinterviews). Drops about 71% of 1958 sample.
*/

/*note: Keep only if (1) R is a cross section respondent or (2) if R was 28 
        or 29 years old in 1956 study. (Would have been dropped from 
        our sample in 1956 bc outside 30-50 age range).
*/
gen tag1958 = (V580008~=1 & (ageR1956~=28 & ageR1956~=29))
count if V580008~=1 & (ageR1956~=28 & ageR1956~=29) 

/* Bring in variable that tags panel 
   respondents that should be kept in 1958 */
sort id_anes
merge 1:1 id_anes using ./output/keepPRs_1958.dta

drop if _merge==2 //all unmatched are bc year is 1956 or 1960
drop _merge

count if V580008~=1 & (ageR1956~=28 & ageR1956~=29) & keepPR_1958~=1 

//implement restrictions
keep if V580008==1 | (ageR1956==28 | ageR1956==29) | keepPR_1958==1

//-----------------------------------------------------------------------------//
//-----------------------------------------------------------------------------//

* Variable for head of household
gen head = relate==1 if relate<9

* Age squared
gen agesq = ageR * ageR

* Fix race variable--categories are white, Black, other. 
replace race =. if race>2 // 1= white, 2=Black
tab race,m 

* Children variable--no cleaning necessary. 

* Number of adults in household
replace number_adults=. if number_adults==9

* Household size
gen R_hhsize_plusR = R_totnumkids_0to17_livinginhh + number_adults if number_adults!=. 
tab R_hhsize_plusR,m 

gen R_hhsize_minusR = R_hhsize_plusR -1
tab R_hhsize_minusR,m 
	
label var R_hhsize_plusR "total # of persons in R's hh (including R)"
label var R_hhsize_minusR "total # of persons in R's hh (NOT including R)"		

* Hispanic variable 
gen hispanic=0
replace hispanic=1 if placeborn==203 | placeborn==182 //mexico, Puerto Rico

* Variable for spouse
gen wifeR = relate==2 & sex==2 if relate<9 

* Variable for marriage
gen married = marital==1 if marital<9 
tab married, m

gen widowed = marital==5 if marital<9
tab widowed, m

gen divorced = marital==3 if marital<9
tab divorced, m

gen separated = marital==4 if marital<9
tab separated, m

gen never_married = marital==2 if marital<9 
tab never_married, m

* Union variable
tab union_raw 
gen union_hh=union_raw>0 & union_raw<8 if union_raw<8 

* Variable for respondent being in union
gen unionR=union_raw==1 | union_raw==3 | union_raw==5 if union_raw<8

* Variable for anyone besides the respondent being in a union
gen union_other = union_raw>=2 & union_raw<=6 if union_raw<8

* Variable for R and someone else in the household
gen union_Rplus = union_raw==3 | union_raw==5 if union_raw<8

* Employment variables for R and HH
	tab occR
	gen employed= occR>=1 & occR<=89 if occR<99
	gen retired = occR==95 if occR<99
	gen laborforce = employed==1 | occR==91 | occR==94 if occR<99 
	gen hwstd = occR==92 | occR==96 if occR<99
	
	tab occHH
	gen employedHH= occHH>=1 & occHH<=89 if occHH<99
	gen retiredHH = occHH==95 if occHH<99
	gen laborforceHH = employedHH==1 | occHH==91 | occHH==94 if occHH<99 
	gen hwstdHH = occHH==92 | occHH==96 if occHH<99

* Clean up occHH variable (get occupation before they lost job, retired, or went on strike)
replace occHH=oldoccHH if occHH==94 | occHH==95 | occHH==91 | occHH==93

* Clean up indR variable for unemployed
replace indR=oldindR if (occR==94 | occR==93 | occR==91) & oldindR<96 

* Clean up occR variable for unemployed
replace occR=oldoccR if occR==94 | occR==93 | occR==91

** Coarsened variables (following Gallup numbering)
foreach x of varlist occR occHH{
gen coarse_`x'=.
replace coarse_`x'=1 if `x'>=1 & `x'<=19 //professional
replace coarse_`x'=2 if `x'>=21 & `x'<=29 //managers
replace coarse_`x'=3 if `x'>=30 & `x'<=32 //clerical
replace coarse_`x'=8 if `x'>=33 & `x'<=39 //sales
replace coarse_`x'=4 if `x'>=41 & `x'<=49 //skilled
replace coarse_`x'=5 if `x'==51 //semi-skilled
replace coarse_`x'=7 if `x'>=61 & `x'<=69 //service
replace coarse_`x'=0 if `x'==71 | (`x'>=81 & `x'<=89)  //farm laborer and operators
replace coarse_`x'=6 if `x'>=78 & `x'<=79 //non-farm laborers
} 

* Coarsen father occ
	merge m:1 fatherocc using "../Crosswalks/Crosswalk_1958_ANES.dta"
	assert fatherocc==. if _merge==1
	drop if _merge==2
	drop _merge

* Coarsen household head occ and respondent
foreach x in HH R {
	preserve
	use "../Crosswalks/Crosswalk_1958_ANES.dta", clear
	rename fatherocc occ`x'
	rename fatheroccej occ`x'_ej

	tempfile cross1
	save `cross1'
	restore 

	replace occ`x'=. if occ`x'==98 | occ`x'==19
	merge m:1 occ`x' using `cross1'
	assert occ`x'==. if _merge==1
	drop if _merge==2
	drop _merge
	}


/* Cannot construct dummies for whether hh head was dad, mom, other male,
   or other female when R was growing up. Can construct headofhh_father_imputed
   --will assume all obs with non-missing father occupation had their father as their 
   head of hh when growing up.
*/
gen headofhh_father_imputed = 1 if fatheroccej!=.

** DUMMIES FOR WHEN WE KNOW WHY DAD OR MOM DIDN'T WORK (I.E. WHY FATHEROCCEJ/MOTHEROCCEJ IS MISSING) 
gen father_notworking = .
replace father_notworking =1 if inrange(fatherocc,91,95) //unemployed, student, other general unemployed, retired. 
replace father_notworking =0 if inrange(fatheroccej,1,81) & father_notworking==.
tab father_notworking, m 
	
//mother_notworking--not available. 

//Note: unable to construct yrsschool var bc edu does not go from 0-18+

** Construct education variable that is comparable over time
gen eduR=.
replace eduR=0 if educationR==0 //no schooling
replace eduR=1 if educationR==1 //less than grade school 
replace eduR=2 if educationR==2 //completed grade school
replace eduR=3 if educationR==3  //some high school
replace eduR=4 if educationR==4 | educationR==5 //HS degree
replace eduR=5 if educationR==6 | educationR==7 //some college 
replace eduR=6 if educationR==8 //college degree
tab eduR,m 

** Consistent income variable over time using midpoints
gen fam_inc=. 
replace fam_inc=0.75*1000 if fam_inc_raw==10 
replace fam_inc=1500 if fam_inc_raw==11 
replace fam_inc=2500 if fam_inc_raw==12 
replace fam_inc=3500 if fam_inc_raw==13 
replace fam_inc=4500 if fam_inc_raw==20 
replace fam_inc=5500 if fam_inc_raw==21 
replace fam_inc=6750 if fam_inc_raw==22 
replace fam_inc=8750 if fam_inc_raw==30 
replace fam_inc=12500 if fam_inc_raw==31 
replace fam_inc=1.25*15000 if fam_inc_raw==32 

//note: the suffix "_son" is used to match the variable names in other datasets. All respondents (i.e., male and female) are given a value for these variables. 
gen bottomcoded_son = fam_inc==750 if fam_inc<.
gen topcoded_son = fam_inc==1.25*15000 if fam_inc<.

** Variable for father foreign born
gen fatherforeign=0
replace fatherforeign=. if origin==99 | origin==49
replace fatherforeign=1 if (origin>=10 & origin<=20) | origin==31 | origin==91

* Variable for respondent being foreign born
gen foreignborn = placeborn>200 if placeborn<999

* Want state where R grew up 
gen stategrow=placegrew
replace stategrow=. if placegrew>200 //grew up in different country (74)
replace stategrow=. if placegrew==109 | placegrew==119 | placegrew==129 | placegrew==139 | placegrew==182 ///Last one is Puerto Rico.
	| placegrew==150 | placegrew==159 | placegrew==169 | placegrew==179 | placegrew==198 | placegrew==199 //region, not state, available (12)
replace stategrow = stategrow - 100 //makes this code match the state codes
label var stategrow "State where R grew up"

* Variable for being in public-sector industry 
gen publicind=0
replace publicind=. if indR==91 | indR==99
replace publicind=1 if occR==3 | indR==40 //teachers and govt employees 

* Relate
replace relate=. if relate==9

* Variable for where respondent raised
gen cityfarm=.
replace cityfarm=1 if raised==1 //farm
replace cityfarm=2 if raised==2 //small town
replace cityfarm=3 if (raised>=3 & raised<=5) | raised==8 //small city
replace cityfarm=4 if (raised>=6 & raised<=7) | raised==9 | raised==10 //large city or suburb

gen cityfarm2=.
replace cityfarm2=1 if raised==0 //farm
replace cityfarm2=2 if raised==1 //small town
replace cityfarm2=3 if raised>=2 & raised<=10 //city or suburb

gen urban=.
replace urban=0 if raised==0 | raised==1
replace urban=1 if raised>=2 & raised<=10

duplicates report id_anes //no duplicates
drop stryear *id_temp id ageR1956
sort id_anes
order id_anes year weight

tempfile ANES58 
save `ANES58' 

********************************************************************************
********************************************************************************

**************************************************************
*** 1960 
**************************************************************
/*Note: This file is the final year of a 3-year panel 
with some cross-sectional respondents and some panel respondents.*/

use "./RawData/NES1960.dta", clear //download data from ANES website 
gen year=1960

keep year V600002 V600003 V600008 V600010 V600015 V600118-V600142 V600180 V600182 V600125 V600091 ///
V600175 V600189 V600176 V600177 V600150 V600184 V600124 V600178 V600120 V600097 V600201  V600062 V600070

** Father occupation
rename V600180 fatherocc

bysort fatherocc: gen nvals = _n ==1
count if nvals 

rename V600003 weight

** Rename other variables
	rename V600002 id
	rename V600010 state
	rename V600118 sex
	rename V600119 race
	rename V600122 ageR
	rename V600129 occR
	rename V600132 oldoccR
	rename V600136 occHH
	rename V600139 oldoccHH
	rename V600130 indR
	rename V600133 oldindR
	rename V600137 indHH
	rename V600140 oldindHH
	rename V600121 relate
	rename V600128 educationR
	rename V600175 placeborn
	rename V600189 fam_inc_raw 
	rename V600176 origin
	rename V600177 fatherorigin 
	rename V600178 motherorigin
	rename V600150 union_raw
	rename V600182 placegrew
	rename V600184 raised 
	rename V600124 marital
	rename V600125 R_totnumkids_0to17_livinginhh 
	rename V600120 number_adults
	rename V600091 R_partyid_detailed
	rename V600097 vote56
	rename V600201 vote60
	rename V600062 govtact_ftblacks
	rename V600070 govtact_integschls

/*
    Note: Construct unique id. Per ANES documentation 
    (https://electionstudies.org/wp-content/uploads/2018/12/anes_timeseries_cdf_codebook_var.pdf), 
    this is done by combining year + id variable. 
*/
tostring year id, gen(stryear strid_temp)
egen id_temp = concat(stryear strid_temp)
destring id_temp, gen(id_anes)

//-----------------------------------------------------------------------------//
//-----------------------------------------------------------------------------//

// RESTRICT SAMPLE TO CROSS-SECTION RESPONDENTS

gen ageR1956 = ageR - 4
tab ageR1956, m

gen ageR1958 = ageR - 2
tab ageR1958, m

/*note: This cross section includes both new respondents and panel 
        respondents from 1956 and/or 1958. Will only keep the new 
        respondents (i.e. non-reinterviews). Drops about 82% of 
        1960 sample.
*/

/*note: Will keep only if
        1) R is a cross section respondent in 1960 or
        2) R was 26 or 27 years old in 1956 study 
        (and would have been dropped from our sample 
        in 1956 and 1958 samples bc outside 30-50 age range), or 
        3) R was 28 or 29 years old in 1958 and only appears 
        in panel in 1958 + 1960 (and would have been dropped from 
        our sample in 1958 samples bc outside 30-50 age range), or
        4) if R only appears in panel in 1956 and 1960 and was 28 
        or 29 in 1956. For these Rs that were dropped, the 1960 study 
        was the first time that they entered our sample of Rs aged 30-50.
*/

gen a = (V600015==9 | ((ageR1958==28 | ageR1958==29) & V600015==6) | ((ageR1956==26 | ageR1956==27) & inrange(V600015,1,5)) | ((ageR1956==28 | ageR1956==29) & inlist(V600015,2,5)))
tab a, m

gen tag1960 = (a==0)
tab tag1960, m

/* Bring in variable that tags panel 
   respondents that should be kept in 1960 */
sort id_anes
merge 1:1 id_anes using ./output/keepPRs_1960.dta

drop if _merge==2 //all unmatched are bc year is 1956 or 1958
drop _merge

tab tag1960 if keepPR_1960==1, m 

*Implement restrictions
keep if V600015==9 | ((ageR1958==28 | ageR1958==29) & V600015==6) | ((ageR1956==26 | ageR1956==27) & inrange(V600015,1,5)) | ((ageR1956==28 | ageR1956==29) & inlist(V600015,2,5) | keepPR_1960==1)

//-----------------------------------------------------------------------------//
//-----------------------------------------------------------------------------//

* clean up age
replace ageR=. if ageR==99
gen agesq = ageR * ageR

* Variable for head of household
gen head = relate==1 if relate<9

* Fix race variable--categories are white, Black, other. 
replace race =. if race>2 // 1= white, 2=Black
tab race,m 

* Children variable(s)
//flag indeterminate (>9) # of kids in hh
gen flag_morethan9kids_inhh = (R_totnumkids_0to17_livinginhh==96) if R_totnumkids_0to17_livinginhh!=99 //99 = na
tab flag_morethan9kids_inhh ,m 
label var flag_morethan9kids_inhh "Dummy =1 if R has indeterminate (>9) # of kids in hh"
	
replace R_totnumkids_0to17_livinginhh=. if R_totnumkids_0to17_livinginhh==99
replace R_totnumkids_0to17_livinginhh = R_totnumkids_0to17_livinginhh/10
replace R_totnumkids_0to17_livinginhh =10 if  flag_morethan9kids_inhh==1 //>9 kids will be topcoded as 10
tab R_totnumkids_0to17_livinginhh,m 
label define V600125_ 10 "10", modify

* # of adults
replace number_adults=. if number_adults==9

* flag indeterminate (6+) # of adults in R's hh
gen flag_6plus_adultsinhh = (number_adults==6) if number_adults<.
tab flag_6plus_adultsinhh,m 
label var flag_6plus_adultsinhh "Dummy =1 if there's an indeterminate (6+) # of adults in R's hh"

* # in hh
gen R_hhsize_plusR = R_totnumkids_0to17_livinginhh + number_adults if (R_totnumkids_0to17_livinginhh!=. & number_adults!=.) 
tab R_hhsize_plusR,m 

gen R_hhsize_minusR = R_hhsize_plusR -1
tab R_hhsize_minusR,m 
	
label var R_hhsize_plusR "total # of persons in R's hh (including R)"
label var R_hhsize_minusR "total # of persons in R's hh (NOT including R)"		

* Hispanic variable
gen hispanic=0
replace hispanic=1 if placeborn==182 | placeborn==219
replace hispanic=1 if fatherorigin==219 | motherorigin==219

* Variable for spouse
gen wifeR = relate==2 & sex==2 if relate<9 

* Variable for marriage
gen married = marital==1 if marital<9
tab married, m

gen widowed = marital==5 if marital<9
tab widowed, m

gen divorced = marital==3 if marital<9
tab divorced, m

gen separated = marital==4 if marital<9
tab separated, m

gen never_married = marital==2 if marital<9 
tab never_married, m

* Union variable
tab union_raw 
gen union_hh=union_raw>0 & union_raw<8 if union_raw<8

* Variable for respondent being in union
gen unionR=union_raw==1 | union_raw==3 | union_raw==5 if union_raw<8

* Variable for anyone besides the respondent being in a union
gen union_other = union_raw>=2 & union_raw<=6 if union_raw<8

* Variable for R and someone else in the household
gen union_Rplus = union_raw==3 | union_raw==5 if union_raw<8 

* Employment variables
	tab occR
	gen employed= occR>=1 & occR<=89 if occR<99
	gen retired = occR==95 if occR<99
	gen laborforce = employed==1 | occR==91 | occR==94 if occR<99 
	gen hwstd = occR==92 | occR==96 if occR<99

	tab occHH
	gen employedHH= occHH>=1 & occHH<=89 if occHH<99
	gen retiredHH = occHH==95 if occHH<99
	gen laborforceHH = employedHH==1 | occHH==91 | occHH==94 if occHH<99 
	gen hwstdHH = occHH==92 | occHH==96 if occHH<99

* Clean up occHH variable (get occupation before they retired or lost job)
replace occHH=oldoccHH if occHH==94 | occHH==95

* Clean up indR variable for unemployed
replace indR=oldindR if occR==94 & indR==91 & oldindR<91 

* Clean up occR variable for unemployed
replace occR=oldoccR if occR==94 

** Coarsened variables (following Gallup numbering)
foreach x of varlist occR occHH{
gen coarse_`x'=.
replace coarse_`x'=1 if `x'>=1 & `x'<=19 //professional
replace coarse_`x'=2 if `x'>=21 & `x'<=29 //managers
replace coarse_`x'=3 if `x'>=30 & `x'<=32 //clerical
replace coarse_`x'=8 if `x'>=33 & `x'<=39 //sales
replace coarse_`x'=4 if `x'>=41 & `x'<=49 //skilled
replace coarse_`x'=5 if `x'==51 //semi-skilled
replace coarse_`x'=7 if `x'>=61 & `x'<=69 //service
replace coarse_`x'=0 if `x'==71 | (`x'>=81 & `x'<=89)  //farm laborer and operators
replace coarse_`x'=6 if `x'>=78 & `x'<=79 //non-farm laborers
}  

* Coarsen father occ
	merge m:1 fatherocc using "../Crosswalks/Crosswalk_1960_ANES.dta"
	assert fatherocc==. if _merge==1
	drop if _merge==2
	drop _merge

* Coarsen household head occ and respondent
foreach x in HH R {
	preserve
	use "../Crosswalks/Crosswalk_1960_ANES.dta", clear
	rename fatherocc occ`x'
	rename fatheroccej occ`x'_ej

	tempfile cross1
	save `cross1'
	restore 

	replace occ`x'=. if occ`x'==98 | occ`x'==19
	merge m:1 occ`x' using `cross1'
	assert occ`x'==. if _merge==1
	drop if _merge==2
	drop _merge
	}

/* Cannot construct dummies for whether hh head was dad, mom, other male,
   or other female when R was growing up. Can construct headofhh_father_imputed
   --will assume all obs with non-missing father occupation had their father as their 
   head of hh when growing up.
*/
gen headofhh_father_imputed = 1 if fatheroccej!=. 

** DUMMIES FOR WHEN WE KNOW WHY DAD OR MOM DIDN'T WORK (I.E. WHY FATHEROCCEJ/MOTHEROCCEJ IS MISSING) 
gen father_notworking = .
replace father_notworking =1 if inrange(fatherocc,91,95) //unemployed, student, other general unemployed, retired
replace father_notworking =0 if inrange(fatheroccej,1,81) & father_notworking==.
tab father_notworking, m 

//mother_notworking--not available. 

*Construct consistent, continuous edu var.
gen yrsschool=.
replace yrsschool = 0 if educationR==0 & yrsschool==.
replace yrsschool = 1 if educationR==11 & yrsschool==.
replace yrsschool = 2 if educationR==12 & yrsschool==.
replace yrsschool = 3 if educationR==13 & yrsschool==.
replace yrsschool = 4 if educationR==14 & yrsschool==.
replace yrsschool = 5 if educationR==15 & yrsschool==.
replace yrsschool = 6 if educationR==16 & yrsschool==.
replace yrsschool = 7 if educationR==17 & yrsschool==.
replace yrsschool = 8 if educationR==21 & yrsschool==.
replace yrsschool = 9 if (educationR==31 | education==41) & yrsschool==.
replace yrsschool = 10 if (educationR==32 | education==42) & yrsschool==.
replace yrsschool = 11 if (educationR==33 | education==43)  & yrsschool==.
replace yrsschool = 12 if (educationR==50 | educationR==51 | educationR==61) & yrsschool==. 
replace yrsschool = 14 if (educationR==71) & yrsschool==.
replace yrsschool = 16 if educationR==81 & yrsschool==.
replace yrsschool = 17 if inrange(educationR,82,86) & yrsschool==.
replace yrsschool =. if inrange(educationR,98,99) /*inap, DK, NA*/
tab educationR yrsschool, m

** Construct consistent education variable
gen eduR=.
replace eduR=0 if educationR==0
replace eduR=1 if educationR>=11 & educationR<=17 //less than grade school
replace eduR=2 if education==21 //grade school
replace eduR=3 if (educationR>=31 & educationR<=33) | (educationR>=41 & educationR<=43) //some high school
replace eduR=4 if educationR==51  //HS degree
replace eduR=5 if educationR==61 | educationR==71 //some college/ hs + technical training
replace eduR=6 if educationR==81 | educationR==82 //college degree
tab eduR,m 

** Consistent income variable over time
gen fam_inc=. 
replace fam_inc=0.75*1000 if fam_inc_raw==0 
replace fam_inc=1500 if fam_inc_raw==10 
replace fam_inc=2500 if fam_inc_raw==20 
replace fam_inc=3500 if fam_inc_raw==30 
replace fam_inc=4500 if fam_inc_raw==40 
replace fam_inc=5500 if fam_inc_raw==50 
replace fam_inc=6750 if fam_inc_raw==60 
replace fam_inc=8750 if fam_inc_raw==70 
replace fam_inc=12500 if fam_inc_raw==80  
replace fam_inc=1.25*15000 if fam_inc_raw==90 

//note: the suffix "_son" is used to match the variable names in other datasets. All respondents (i.e., male and female) are given a value for these variables. 
gen bottomcoded_son = fam_inc==750 if fam_inc<.
gen topcoded_son = fam_inc==1.25*15000 if fam_inc<.

** Variable for father foreign born
gen fatherforeign=0
replace fatherforeign=. if (origin==91 | origin==99)  
replace fatherforeign=1 if (fatherorigin>=209 & fatherorigin<=508) ///
	| (placeborn>=209 & placeborn<=699) 

* Variable for respondent being foreign born
gen foreignborn = placeborn>200 if placeborn<999

* Want state where R grew up 
gen stategrow=placegrew
replace stategrow=. if placegrew>200 //grew up in different country (55)
replace stategrow=. if placegrew==109 | placegrew==119 | placegrew==129 | placegrew==138 | placegrew==139 | placegrew==159 ///
	| placegrew==150 | placegrew==158 | placegrew==169 | placegrew==178 | placegrew==179 | placegrew==192 | placegrew==182 ///
	| placegrew==198 | placegrew==199 //region, not state, available (35)
replace stategrow = stategrow - 100 //makes this code match the state codes
label var stategrow "State where R grew up"

* Variable for being in public-sector industry 
gen publicind=0
replace publicind=. if indR==91 | indR==99
replace publicind=1 if occR==3 | indR==40 //teachers and govt employees 

* Relate clean up
replace relate=. if relate==9

* Variable for where respondent raised
gen cityfarm=.
replace cityfarm=1 if raised==10 //farm
replace cityfarm=2 if raised==20 //small town
replace cityfarm=3 if (raised>=30 & raised<=50) | raised==80 //small city
replace cityfarm=4 if (raised>=60 & raised<=70) | raised==90 //large city (no suburb category)

gen cityfarm2=.
replace cityfarm2=1 if raised==10 //farm
replace cityfarm2=2 if raised==20 //small town
replace cityfarm2=3 if raised>=30 & raised<=90 //city 

gen urban=.
replace urban=0 if raised==10 | raised==20
replace urban=1 if raised>=30 & raised<=90

duplicates report id_anes //no duplicates
drop stryear *id_temp id ageR1956 ageR1958
sort id_anes

tempfile ANES60 
save `ANES60' 


********************************************************************************
********************************************************************************

**********************
*** 1964 
**********************

use "./RawData/NES1964.dta", clear //download data from ANES website 
gen year=1964

keep year V640002 V640012 V640466 V640259 V640182-V640217 V640252 V640261 V640189 V640146 ///
V640270 V640269 V640254 V640253 V640228 V640263 V640188 V640255 V640257 V640184 V640097 V640100 V640104 V640105 V640108 V640111 V640115 V640116 V640125 V640126 V640127 V640156 V640286 V640408 V640151 V640153 V640250

** Father's occupation
rename V640259 fatherocc

bysort fatherocc: gen nvals = _n ==1
count if nvals 

** Weight
rename V640466 weight 

* Rename other variables
	rename V640002 id
	rename V640012 state
	rename V640182 sex
	rename V640183 race
	rename V640187 ageR
	rename V640196 educationR
	rename V640252 placeborn
	rename V640261 placegrew
	rename V640263 raised
	rename V640253 origin
	rename V640269 fam_inc_raw
	rename V640270 famincHH
	rename V640254 fatherorigin
	rename V640255 motherorigin
	rename V640228 union_raw
	rename V640185 relate
	rename V640202 occR
	rename V640205 oldoccR
	rename V640209 occHH
	rename V640212 oldoccHH
	rename V640203 indR
	rename V640206 oldindR
	rename V640210 indHH
	rename V640213 oldindHH
	rename V640188 marital
	rename V640257 nationality
	rename V640189 R_totnumkids_0to17_livinginhh 
	rename V640184 number_adults
	rename V640146 R_partyid_detailed
	rename V640151 Rfather_partyid
	rename V640153 Rmother_partyid
	rename V640156 vote60
	rename V640097 govtact_ftblacks
	rename V640100 govtact_integschls
	rename V640104 hmchange_blackpos
	rename V640105 speed_civrightsmovt
	rename V640108 whiteRop_fintegschl
	rename V640111 blackRsop_integschl
	rename V640115 busing
	rename V640116 Rop_housinginteg
	rename V640125 locblacks_favdeseg
	rename V640126 locwhites_favstrseg
	rename V640127 Rop_degreeraceseg
	rename V640286 vote64
	rename V640408 govtact_fpubaccom
	rename V640250 thermometer_blacks 
	
/*
    Note: Construct unique id. Per ANES documentation 
    (https://electionstudies.org/wp-content/uploads/2018/12/anes_timeseries_cdf_codebook_var.pdf), 
    this is done by combining year + id variable. 
*/
tostring year id, gen(stryear strid_temp)
egen id_temp = concat(stryear strid_temp)
destring id_temp, gen(id_anes)

* Clean up age
replace ageR=. if ageR==0
gen agesq = ageR * ageR

* Fix race variable--categories are white, Black, other. 
replace race =. if race>2 // 1= white, 2=Black
tab race,m 

* Children variable
replace R_totnumkids_0to17_livinginhh=. if R_totnumkids_0to17_livinginhh==9

//flag indeterminate (8+) # of kids under 18 in hh
	gen flag_8plus_kidsunder18_inhh = (R_totnumkids_0to17_livinginhh==8) if R_totnumkids_0to17_livinginhh!=.
	tab flag_8plus_kidsunder18_inhh,m 
	label var flag_8plus_kidsunder18_inhh "Dummy =1 if R has indeterminate (8+) # of kids under 18 in R's hh"

* Number of adults in hh
replace number_adults=. if number_adults==9

* flag indeterminate (6+) # of adults in R's hh
gen flag_6plus_adultsinhh = (number_adults==6) if number_adults<.
tab flag_6plus_adultsinhh,m 
label var flag_6plus_adultsinhh "Dummy =1 if there's an indeterminate (6+) # of adults in R's hh"

* # of persons in hh
gen R_hhsize_plusR = R_totnumkids_0to17_livinginhh + number_adults if (R_totnumkids_0to17_livinginhh!=. & number_adults!=.) 
tab R_hhsize_plusR,m 

gen R_hhsize_minusR = R_hhsize_plusR -1
tab R_hhsize_minusR,m 
	
label var R_hhsize_plusR "total # of persons in R's hh (including R)"
label var R_hhsize_minusR "total # of persons in R's hh (NOT including R)"		

* Variable for head of household
gen head = relate==1 if relate<9

* Hispanic variable
gen hispanic=0
replace hispanic=1 if placeborn==182 | placeborn==219 | placeborn==229 //Puerto rico, mexico, central america
replace hispanic=1 if fatherorigin==219 | fatherorigin==259 //mexico, south america
replace hispanic=1 if motherorigin==219 | motherorigin==259 //mexico, south america
replace hispanic=1 if nationality==182 | nationality==219 //Puerto rico, mexico

* Variable for spouse
gen wifeR = relate==2 & sex==2 if relate<9 

* Variable for marriage
gen married = marital==1 if marital<9
tab married, m

gen widowed = marital==5 if marital<9
tab widowed, m

gen divorced = marital==3 if marital<9
tab divorced, m

gen separated = marital==4 if marital<9
tab separated, m

gen never_married = marital==2 if marital<9
tab never_married, m

* Union variable
tab union_raw 
gen union_hh=union_raw>0 & union_raw<8 if union_raw<8

* Variable for R and someone else in the household
gen union_Rplus = union_raw==3 | union_raw==5 | union_raw==7 if union_raw<8 

* Variable for anyone besides the respondent being in a union
gen union_other = union_raw>=2 & union_raw<=7 if union_raw<8

* Employment variables
	tab occR
	gen employed= (occR>=1 & occR<=89) | occR==93 if occR<99
	gen retired = occR==95 if occR<99
	gen laborforce = employed==1 | occR==91 | occR==94 if occR<99 
	gen hwstd = occR==92 | occR==96 if occR<99

	tab occHH
	gen employedHH= (occHH>=1 & occHH<=89) | occHH==93 if occHH<99
	gen retiredHH = occHH==95 if occHH<99
	gen laborforceHH = employedHH==1 | occHH==91 | occHH==94 if occHH<99 
	gen hwstdHH = occHH==92 | occHH==96 if occHH<99

* Clean up occHH (get jobs before unemployment, strike, or retirement)
replace occHH=oldoccHH if occHH==91 | occHH==93 | occHH==94 | occHH==95

* Clean up indR variable for unemployed or on strike
replace indR=oldindR if (occR==91 | occR==93 | occR==94) & indR==90 & oldindR<91

* Clean up occR variable for unemployed or on strike
replace occR=oldoccR if occR==93 | occR==94 | occR==91

** Coarsened variables (following Gallup numbering)
foreach x of varlist occR occHH{
gen coarse_`x'=.
replace coarse_`x'=1 if `x'>=1 & `x'<=19 //professional
replace coarse_`x'=2 if `x'>=21 & `x'<=29 //managers
replace coarse_`x'=3 if `x'>=30 & `x'<=32 //clerical
replace coarse_`x'=8 if `x'>=33 & `x'<=39 //sales
replace coarse_`x'=4 if `x'>=41 & `x'<=49 //skilled
replace coarse_`x'=5 if `x'==51 //semi-skilled
replace coarse_`x'=7 if `x'>=61 & `x'<=69 //service
replace coarse_`x'=0 if `x'==71 | (`x'>=81 & `x'<=89)  //farm laborer and operators
replace coarse_`x'=6 if `x'>=78 & `x'<=79 //non-farm laborers
} 

* Coarsen father occ
	merge m:1 fatherocc using "../Crosswalks/Crosswalk_1964_ANES.dta"
	assert fatherocc==. if _merge==1
	drop if _merge==2
	drop _merge

* Coarsen household head occ and respondent
foreach x in HH R {
	preserve
	use "../Crosswalks/Crosswalk_1964_ANES.dta", clear
	rename fatherocc occ`x'
	rename fatheroccej occ`x'_ej

	tempfile cross1
	save `cross1'
	restore 

	replace occ`x'=. if occ`x'==98 | occ`x'==19
	merge m:1 occ`x' using `cross1'
	assert occ`x'==. if _merge==1
	drop if _merge==2
	drop _merge
	}
	

/* Cannot construct dummies for whether hh head was dad, mom, other male,
   or other female when R was growing up. Can construct headofhh_father_imputed
   --will assume all obs with non-missing father occupation had their father as their 
   head of hh when growing up.
*/
gen headofhh_father_imputed = 1 if fatheroccej!=. 

** DUMMIES FOR WHEN WE KNOW WHY DAD OR MOM DIDN'T WORK (I.E. WHY FATHEROCCEJ/MOTHEROCCEJ IS MISSING) 
gen father_notworking = .
replace father_notworking =1 if inrange(fatherocc,91,95) //unemployed, student, other general unemployed, retired
replace father_notworking =0 if inrange(fatheroccej,1,81) & father_notworking==.
tab father_notworking, m 

//mother_notworking--not available. 

*Construct consistent, continuous edu var
gen yrsschool=.
replace yrsschool = 0 if educationR==0 & yrsschool==.
replace yrsschool = 1 if educationR==11 & yrsschool==.
replace yrsschool = 2 if educationR==12 & yrsschool==.
replace yrsschool = 3 if educationR==13 & yrsschool==.
replace yrsschool = 4 if educationR==14 & yrsschool==.
replace yrsschool = 5 if educationR==15 & yrsschool==.
replace yrsschool = 6 if educationR==16 & yrsschool==.
replace yrsschool = 7 if educationR==17 & yrsschool==.
replace yrsschool = 8 if educationR==21 & yrsschool==.
replace yrsschool = 9 if (educationR==31 | educationR==41) & yrsschool==.
replace yrsschool = 10 if (educationR==32 | educationR==42) & yrsschool==.
replace yrsschool = 11 if (educationR==33 | educationR==43)  & yrsschool==.
replace yrsschool = 12 if (educationR==50 | educationR==51 | educationR==61) & yrsschool==. 
replace yrsschool = 14 if (educationR==71) & yrsschool==.
replace yrsschool = 16 if educationR==81 & yrsschool==.
replace yrsschool = 17 if inrange(educationR,82,86) & yrsschool==.
replace yrsschool =. if inrange(educationR,98,99) /*inap, DK, NA*/
tab yrsschool, m

** Construct consistent education variable
gen eduR=.
replace eduR=0 if educationR==0
replace eduR=1 if educationR>=11 & educationR<=17 //less than grade school
replace eduR=2 if education==21 //grade school
replace eduR=3 if (educationR>=31 & educationR<=33) | (educationR>=41 & educationR<=43) //some high school
replace eduR=4 if educationR==51  //HS degree
replace eduR=5 if educationR==61 | educationR==71 //some college/ hs + technical training
replace eduR=6 if educationR>=81 & educationR<98 //college degree
tab eduR,m 


** Family income consistent over time
gen fam_inc=. 
replace fam_inc=0.75*1000 if fam_inc_raw==10 
replace fam_inc=1500 if fam_inc_raw==11 
replace fam_inc=2500 if fam_inc_raw==12 
replace fam_inc=3500 if fam_inc_raw==13 
replace fam_inc=4500 if fam_inc_raw==14 
replace fam_inc=5500 if fam_inc_raw==20 
replace fam_inc=6750 if fam_inc_raw==21 
replace fam_inc=8750 if fam_inc_raw==22 
replace fam_inc=12500 if fam_inc_raw==30  
replace fam_inc=1.25*15000 if fam_inc_raw==31

//note: the suffix "_son" is used to match the variable names in other datasets. All respondents (i.e., male and female) are given a value for these variables. 
gen bottomcoded_son = fam_inc==750 if fam_inc<.
gen topcoded_son = fam_inc==1.25*15000 if fam_inc<.

** Variable for father foreign born
gen fatherforeign=0
replace fatherforeign=. if fatherorigin==999 | fatherorigin==998 | (fatherorigin==991 & race!=2 & placeborn<200) ///
	| origin==60 | origin==80 | origin==98 | origin==99
replace fatherforeign=1 if (fatherorigin>=209 & fatherorigin<=711) ///
	| (placeborn>=209 & placeborn<=704)
replace fatherforeign=1 if origin==30 | origin==40 | origin==70 

* Variable for respondent being foreign born
gen foreignborn = placeborn>200 if placeborn<999

* Want state where R grew up 
gen stategrow=placegrew
replace stategrow=. if placegrew>200 //grew up in different country
replace stategrow=. if placegrew==109 | placegrew==118 | placegrew==119 | placegrew==128 | placegrew==129 | placegrew==182 ///
	| placegrew==138 | placegrew==139 | placegrew==158 | placegrew==159 | placegrew==169 | placegrew==178 | placegrew==179 ///
	| (placegrew>=190 & placegrew<=199) //region, not state, available
replace stategrow = stategrow - 100 //makes this code match the state codes
label var stategrow "State where R grew up"

* Variable for being in public-sector industry 
gen publicind=0
replace publicind=. if indR==90 | indR==99
replace publicind=1 if occR==3 | indR==40 //govt employees 

* Variable for where respondent raised
gen cityfarm=.
replace cityfarm=1 if raised==0 //farm
replace cityfarm=2 if raised==10 //small town
replace cityfarm=3 if (raised>=20 & raised<=40) | raised==70 //small city
replace cityfarm=4 if (raised>=50 & raised<=60) | raised==80 //large city (no suburb category)

gen cityfarm2=.
replace cityfarm2=1 if raised==0 //farm
replace cityfarm2=2 if raised==10 //small town
replace cityfarm2=3 if raised>=20 & raised<=80 //city 

gen urban=.
replace urban=0 if raised==10 | raised==0
replace urban=1 if raised>=20 & raised<=80

duplicates report id_anes //no duplicates
drop stryear *id_temp id 
sort id_anes

tempfile ANES64 
save `ANES64' 

********************************************************************************
********************************************************************************

*********************
*** 1966 
*********************

use "./RawData/NES1966.dta", clear //download data from ANES website 
gen year=1966

keep year V660002 V660010 V660193-V660203 V660223 V660228 V660230 V660225 V660226 V660014 V660015 ///
V660235-V660238 V660224 V660225 V660215 V660232 V660192 V660193 V660227 V660194 V660082 V660085 V660130 V660152 V660042 V660043 V660025 V660028

** No weights in this cross-section
gen weight=1

** Father's occupation
rename V660228 fatherocc

bysort fatherocc: gen nvals = _n ==1
count if nvals 

* Rename other variables 
	rename V660002 id
	rename V660010 state
	rename V660014 number_adults_pfu
	rename V660015 number_adults_sfu
	rename V660197 educationR
	rename V660223 placeborn
	rename V660235 fam_inc_raw  
	rename V660236 sex
	rename V660237 race
	rename V660224 origin
	rename V660225 fatherorigin
	rename V660226 matrilineal
	rename V660215 union_raw
	rename V660201 occHH
	rename V660202 indHH
	rename V660199 empstatHH
	rename V660238 relate
	rename V660230 placegrew
	rename V660232 raised
	rename V660192 ageR
	rename V660193 marital
	rename V660194 R_totnumkids_0to17_livinginhh 
	rename V660082 R_partyid_detailed
	rename V660085 vote64
	rename V660130 thermometer_blacks
	rename V660152 Rop_openhousing
	rename V660042 hmchange_blackpos
	rename V660043 speed_civrightsmovt
	rename V660025 govtact_integschls
	rename V660028 Rop_schoolinteg

/*
    Note: Construct unique id. Per ANES documentation 
    (https://electionstudies.org/wp-content/uploads/2018/12/anes_timeseries_cdf_codebook_var.pdf), 
    this is done by combining year + id variable. 
*/
tostring year id, gen(stryear strid_temp)
egen id_temp = concat(stryear strid_temp)
destring id_temp, gen(id_anes)

* Variable for head of household
gen head = relate==1 if relate<9

* Fix race variable
replace race =1 if race==3 | race==4 
replace race =. if race>2 
tab race,m //left with white and Black

* Children variable
replace R_totnumkids_0to17_livinginhh=. if R_totnumkids_0to17_livinginhh==9

//flag indeterminate (8+) # of kids under 18 in hh
gen flag_8plus_kidsunder18_inhh = (R_totnumkids_0to17_livinginhh==8) if R_totnumkids_0to17_livinginhh!=.
tab flag_8plus_kidsunder18_inhh,m 
label var flag_8plus_kidsunder18_inhh "Dummy =1 if R has indeterminate (8+) # of kids under 18 in R's hh"

* Number of adults in hh
	gen number_primary=.
	replace number_primary=1 if number_adults_pfu==1 | number_adults_pfu==2
	replace number_primary=2 if number_adults_pfu==3 | number_adults_pfu==6 | number_adults_pfu==8
	replace number_primary=3 if number_adults_pfu==4 | number_adults_pfu==7 | number_adults_pfu==9
	replace number_primary=4 if number_adults_pfu==5
	tab number_primary,m 

	gen number_secondary =0 
	replace number_secondary=1 if number_adults_sfu==1
	replace number_secondary=2 if number_adults_sfu==2 | number_adults_sfu==3
	replace number_secondary=3 if number_adults_sfu==6
	replace number_secondary=4 if number_adults_sfu==8
	tab number_secondary,m

	gen R_hhsize_plusR = R_totnumkids_0to17_livinginhh + number_primary + number_secondary if R_totnumkids_0to17_livinginhh!=. 
	tab R_hhsize_plusR, m
	
	gen R_hhsize_minusR = R_hhsize_plusR -1
	tab R_hhsize_minusR,m 
	
	label var R_hhsize_plusR "total # of persons in R's hh (including R)"
	label var R_hhsize_minusR "total # of persons in R's hh (NOT including R)"		

* Clean up ageR
replace ageR=. if ageR==99
gen agesq = ageR * ageR

* Hispanic variable 
gen hispanic=race==3 | race==4 
replace hispanic=1 if placeborn==219 | placeborn==249 | placeborn==182 //mexico, south america, Puerto Rico
replace hispanic=1 if fatherorigin==219 | fatherorigin==249 | fatherorigin==182
replace hispanic=1 if matrilineal==219 | matrilineal==249 | matrilineal==182

* Variable for marriage
gen married = marital==1 if marital<9
tab married, m

gen widowed = marital==5 if marital<9
tab widowed, m

gen divorced = marital==3 if marital<9
tab divorced, m

gen separated = marital==4 if marital<9
tab separated, m

gen never_married = marital==2 if marital<9 
tab never_married, m

* Variable for spouse
gen wifeR = relate==2 & sex==2 if relate<9 

** No employment variables for respondent because only HH-level info is available.
	gen employedHH = empstatHH==1 if empstatHH<9
	gen laborforceHH = empstatHH==1 | empstatHH==2 if empstatHH<9
	gen retiredHH = empstatHH==3 | empstatHH==4 if empstatHH<9 //retired and permanently disabled
	gen hwstdHH = empstatHH==5 | empstatHH==6 if empstatHH<9

* Union variable
tab union_raw 
gen union_hh=union_raw>0 & union_raw<8 if union_raw<8

* Variable for respondent being in union
gen unionR=union_raw==1 | union_raw==3 | union_raw==5 | union_raw==7 if union_raw<8

* Variable for anyone besides the respondent being in a union
gen union_other = union_raw>=2 & union_raw<=7 if union_raw<8

* Variable for R and someone else in the household
gen union_Rplus = union_raw==3 | union_raw==5 | union_raw==7 if union_raw<8

** Coarsened variables (following Gallup numbering)
foreach x of varlist occHH{
gen coarse_`x'=.
replace coarse_`x'=1 if `x'>=101 & `x'<=160 //professional
replace coarse_`x'=2 if `x'>=201 & `x'<=275 //managers
replace coarse_`x'=3 if `x'>=301 & `x'<=328 //clerical
replace coarse_`x'=8 if `x'>=350 & `x'<=390 //sales
replace coarse_`x'=4 if `x'>=401 & `x'<=473 //skilled
replace coarse_`x'=5 if `x'>=501 & `x'<=556 //semi-skilled (operatives etc)
replace coarse_`x'=7 if `x'>=610 & `x'<=681 //service
replace coarse_`x'=0 if (`x'>=710 & `x'<=712) | (`x'>=810 & `x'<=850)  //farm laborer and operators
replace coarse_`x'=6 if `x'>=720 & `x'<=730 //non-farm laborers
} 

* Coarsen father occ
	merge m:1 fatherocc using "../Crosswalks/Crosswalk_1966_ANES.dta"
	assert fatherocc==. if _merge==1
	drop if _merge==2
	drop _merge

* Coarsen household head occ and respondent
foreach x in HH  {
	preserve
	use "../Crosswalks/Crosswalk_1966_ANES.dta", clear
	rename fatherocc occ`x'
	rename fatheroccej occ`x'_ej

	tempfile cross1
	save `cross1'
	restore 

	replace occ`x'=. if occ`x'==98 | occ`x'==19
	merge m:1 occ`x' using `cross1'
	assert occ`x'==. if _merge==1
	drop if _merge==2
	drop _merge
	}

/* Cannot construct dummies for whether hh head was dad, mom, other male,
   or other female when R was growing up. Can construct headofhh_father_imputed
   --will assume all obs with non-missing father occupation had their father as their 
   head of hh when growing up.
*/
gen headofhh_father_imputed = 1 if fatheroccej!=. 

** DUMMIES FOR WHEN WE KNOW WHY DAD OR MOM DIDN'T WORK (I.E. WHY FATHEROCCEJ/MOTHEROCCEJ IS MISSING) 
gen father_notworking = .
replace father_notworking =1 if inrange(fatherocc,910,960)
replace father_notworking =0 if inrange(fatheroccej,1,81) & father_notworking==.
tab father_notworking, m 

//mother_notworking--not available. 

*Construct consistent, continuous edu var
gen yrsschool=.
replace yrsschool = 0 if educationR==0 & yrsschool==.
replace yrsschool = 1 if educationR==11 & yrsschool==.
replace yrsschool = 2 if educationR==12 & yrsschool==.
replace yrsschool = 3 if educationR==13 & yrsschool==.
replace yrsschool = 4 if educationR==14 & yrsschool==.
replace yrsschool = 5 if educationR==15 & yrsschool==.
replace yrsschool = 6 if educationR==16 & yrsschool==.
replace yrsschool = 7 if (educationR==17 | educationR==18) & yrsschool==.
replace yrsschool = 8 if (educationR==21 | educationR==22) & yrsschool==.
replace yrsschool = 9 if (educationR==31 | educationR==41) & yrsschool==.
replace yrsschool = 10 if (educationR==32 | educationR==42 ) & yrsschool==.
replace yrsschool = 11 if (educationR==33 | educationR==43)  & yrsschool==.
replace yrsschool = 12 if (educationR==50 | educationR==51 | educationR==61) & yrsschool==. 
replace yrsschool = 14 if (educationR==71) & yrsschool==.
replace yrsschool = 16 if educationR==81 & yrsschool==.
replace yrsschool = 17 if inrange(educationR,82,86) & yrsschool==.
replace yrsschool =. if inrange(educationR,98,99) /*inap, DK, NA*/
tab yrsschool, m

** Construct consistent education variable
gen eduR=.
replace eduR=0 if educationR==0
replace eduR=1 if educationR>=11 & educationR<=18 //less than grade school
replace eduR=2 if education==21 | education==22 //grade school
replace eduR=3 if (educationR>=31 & educationR<=33) | (educationR>=41 & educationR<=43) //some high school
replace eduR=4 if educationR==51  //HS degree
replace eduR=5 if educationR==61 | educationR==71 //some college/ hs + technical training
replace eduR=6 if educationR>=81 & educationR<87 //college degree
tab eduR,m 


** Family income variable consistent over time
gen fam_inc=. 
replace fam_inc=0.75*1000 if fam_inc_raw==10 
replace fam_inc=1500 if fam_inc_raw==11 
replace fam_inc=2500 if fam_inc_raw==12 
replace fam_inc=3500 if fam_inc_raw==13 
replace fam_inc=4500 if fam_inc_raw==14 
replace fam_inc=6250 if fam_inc_raw==20 
replace fam_inc=8750 if fam_inc_raw==21 
replace fam_inc=12500 if fam_inc_raw==30 
replace fam_inc=20000 if fam_inc_raw==31  
replace fam_inc=1.25*25000 if fam_inc_raw==32 

//note: the suffix "_son" is used to match the variable names in other datasets. All respondents (i.e., male and female) are given a value for these variables. 
gen bottomcoded_son = fam_inc==750 if fam_inc<.
gen topcoded_son = fam_inc==1.25*25000 if fam_inc<.

** Variable for father foreign born
gen fatherforeign=0
replace fatherforeign=. if fatherorigin==999 | fatherorigin==998 | origin==8 | origin==9
replace fatherforeign=1 if (fatherorigin>=209 & fatherorigin<=431) ///
	| (placeborn>=208 & placeborn<=428) 
replace fatherforeign=1 if origin==3 | origin==4 

* Variable for respondent being foreign born
gen foreignborn = placeborn>200 if placeborn<999

* Want state where R grew up 
gen stategrow=placegrew
replace stategrow=. if placegrew>200 //grew up in different country
replace stategrow=. if placegrew==109 | placegrew==118 | placegrew==119 | placegrew==129 ///
	| placegrew==138 | placegrew==139 | placegrew==158 | placegrew==169 | placegrew==178 | placegrew==179 ///
	| (placegrew>=182 & placegrew<=199) //region, not state, available
replace stategrow = stategrow - 100 //makes this code match the state codes
label var stategrow "State where R grew up"

gen publicindHH=0
replace publicindHH=. if indHH==91 | indHH==99
replace publicindHH=1 if (indHH==32 & (occHH==142 | occHH==126)) | (indHH==40)

* Variable for where respondent raised
gen cityfarm=.
replace cityfarm=1 if raised==1 //farm
replace cityfarm=2 if raised==2 //small town
replace cityfarm=3 if raised==3 //small city
replace cityfarm=4 if raised==4 //large city

gen cityfarm2=.
replace cityfarm2=1 if raised==1 //farm
replace cityfarm2=2 if raised==2 //small town
replace cityfarm2=3 if raised==3 | raised==4 | raised==6 //city (6 is either small or large)

gen urban=.
replace urban=0 if raised==1 | raised==2
replace urban=1 if raised>=3 & raised<=6

duplicates report id_anes //no duplicates
drop stryear *id_temp id 
sort id_anes

tempfile ANES66 
save `ANES66' 


********************************************************************************
********************************************************************************

********************
*** 1968 
********************

use "./RawData/NES1968.dta", clear //download data from ANES website 
gen year=1968

keep year V680002 V680003 V680014 V680146-V680154 V680156-V680162 V680201 V680246 V680018 V680019 ///
V680166-V680170 V680174 V680175 V680190-V680193 V680243 V680259 V680146 V680147 V680120 ///
V680252 V680257 V680261 V680263-V680265 V680525 V680533 V680244 V680245 V680247 V680248 V680073 V680075 V680078 V680080 V680081 V680084 V680086 V680087 V680088 V680124 V680126 V680129 V680240 V680316

* Father's occupation
rename V680252 fatherocc

bysort fatherocc: gen nvals = _n ==1
count if nvals 

** Weight
rename V680003 weight 

* Rename variables
	rename V680002 id
	rename V680014 state
	rename V680018 number_adults_pfu
	rename V680019 number_adults_sfu
	rename V680156 educationR
	rename V680160 empstatR
	rename V680168 empstatHH
	rename V680243 placeborn
	rename V680244 origin
	rename V680245 fatherorigin
	rename V680246 motherorigin
	rename V680261 fam_inc_raw
	rename V680263 sex 
	rename V680264 race
	rename V680533 ageR
	rename V680201 union_raw
	rename V680161 occR
	rename V680169 occHH
	rename V680162 indR
	rename V680170 indHH
	rename V680265 relate
	rename V680257 placegrew
	rename V680259 raised
	rename V680146 marital
	rename V680247 fatherside
	rename V680248 motherside
	rename V680147 R_totnumkids_0to17_livinginhh 
	rename V680120 R_partyid_detailed
	rename V680124 Rfather_partyid
	rename V680126 Rmother_partyid
	rename V680129 vote64
	rename V680316 vote68
	rename V680073 govtact_ftblacks
	rename V680075 govtact_integschls
	rename V680078 govtact_fpubaccom
	rename V680080 hmchange_blackpos
	rename V680081 speed_civrightsmovt
	rename V680084 Rop_housinginteg
	rename V680086 locblacks_favdeseg
	rename V680087 locwhites_favstrseg
	rename V680088 Rop_degreeraceseg
	rename V680240 thermometer_blacks
	
/*
    Note: Construct unique id. Per ANES documentation 
    (https://electionstudies.org/wp-content/uploads/2018/12/anes_timeseries_cdf_codebook_var.pdf), 
    this is done by combining year + id variable. 
*/
tostring year id, gen(stryear strid_temp)
egen id_temp = concat(stryear strid_temp)
destring id_temp, gen(id_anes)

* Variable for head of household
gen head = relate==1 if relate<9

* Fix race variable
replace race =1 if race==3 | race==4 
replace race =. if race>2 
tab race,m //left with white and Black

* # of children 0-17 living in R's hh
replace R_totnumkids_0to17_livinginhh=. if R_totnumkids_0to17_livinginhh==9

//flag indeterminate (8+) # of kids under 18 in hh
gen flag_8plus_kidsunder18_inhh = (R_totnumkids_0to17_livinginhh==8) if R_totnumkids_0to17_livinginhh!=.

tab flag_8plus_kidsunder18_inhh,m 
label var flag_8plus_kidsunder18_inhh "Dummy =1 if R has indeterminate (8+) # of kids under 18 in R's hh"

* # of adults in hh
	gen number_primary=.
	replace number_primary=1 if number_adults_pfu==1 | number_adults_pfu==2
	replace number_primary=2 if number_adults_pfu==3 | number_adults_pfu==6 | number_adults_pfu==8
	replace number_primary=3 if number_adults_pfu==4 | number_adults_pfu==7 | number_adults_pfu==9
	replace number_primary=4 if number_adults_pfu==5

	gen number_secondary = 0 
	replace number_secondary=1 if number_adults_sfu==1
	replace number_secondary=2 if number_adults_sfu==3	
	replace number_secondary=3 if number_adults_sfu==4 | number_adults_sfu==6
	replace number_secondary=4 if number_adults_sfu==8

* # of persons in hh
	gen R_hhsize_plusR = R_totnumkids_0to17_livinginhh + number_primary + number_secondary if R_totnumkids_0to17_livinginhh!=.
	tab R_hhsize_plusR, m
	
	gen R_hhsize_minusR = R_hhsize_plusR -1
	tab R_hhsize_minusR,m 
	
	label var R_hhsize_plusR "total # of persons in R's hh (including R)"
	label var R_hhsize_minusR "total # of persons in R's hh (NOT including R)"	
	
* Variable for marriage
gen married = marital==1 if marital<9
tab married, m

gen widowed = marital==5 if marital<9
tab widowed, m

gen divorced = marital==3 if marital<9
tab divorced, m

gen separated = marital==4 if marital<9
tab separated, m

gen never_married = marital==2 if marital<9 
tab never_married, m

* Variable for spouse
gen wifeR = relate==2 & sex==2 if relate<9 

* Age clean-up
replace ageR=. if ageR==99
gen agesq = ageR * ageR

* Hispanic variable
gen hispanic=race==3 | race==4 
replace hispanic=1 if placeborn==182 | placeborn==219 | placeborn==227 | placeborn==246 
replace hispanic=1 if fatherorigin==219 | fatherorigin==182 
replace hispanic=1 if motherorigin==219 | motherorigin==182
replace hispanic=1 if fatherside==219 | motherside==219 

* Union variable
tab union_raw 
gen union_hh=union_raw>0 & union_raw<8 if union_raw<8

* Variable for respondent being in union
gen unionR=union_raw==1 | union_raw==3 | union_raw==5 | union_raw==7 if union_raw<8

* Variable for anyone besides the respondent being in a union
gen union_other = union_raw>=2 & union_raw<=7 if union_raw<8

* Variable for R and someone else in the household
gen union_Rplus = union_raw==3 | union_raw==5 | union_raw==7 if union_raw<8

* Employment variables
	tab empstatR
	replace empstatR=. if empstatR==7 //"other"
	gen employed= empstatR==1 if empstatR<8
	gen retired = empstatR==3 | empstatR==4 if empstatR<8 //includes permanently disabled
	gen laborforce = employed==1 | empstatR==2 if empstatR<8 
	gen hwstd = empstatR==5 | empstatR==6 if empstatR<8

	tab empstatHH
	gen employedHH= empstatHH==1 if empstatR<8
	gen retiredHH = empstatHH==3 | empstatHH==4 if empstatHH<8 //includes permanently disabled
	gen laborforceHH = empstatHH==1 | empstatHH==2 if empstatHH<8 
	gen hwstdHH = empstatHH==5 | empstatHH==6 if empstatHH<8

* Clean occR (901 for housewives and students)
replace occR=. if occR==999

** Coarsened variables (following Gallup numbering)
foreach x of varlist occR occHH{
gen coarse_`x'=.
replace coarse_`x'=1 if `x'>=101 & `x'<=160 //professional
replace coarse_`x'=2 if `x'>=201 & `x'<=275 //managers
replace coarse_`x'=3 if `x'>=301 & `x'<=328 //clerical
replace coarse_`x'=8 if `x'>=350 & `x'<=390 //sales
replace coarse_`x'=4 if `x'>=401 & `x'<=473 //skilled
replace coarse_`x'=5 if `x'>=501 & `x'<=560 //semi-skilled (operatives etc)
replace coarse_`x'=7 if `x'>=610 & `x'<=681 //service
replace coarse_`x'=0 if (`x'>=710 & `x'<=712) | (`x'>=810 & `x'<=850)  //farm laborer and operators
replace coarse_`x'=6 if `x'>=720 & `x'<=730 //non-farm laborers
} 

* Coarsen father occ
	merge m:1 fatherocc using "../Crosswalks/Crosswalk_1968_ANES.dta"
	assert fatherocc==. if _merge==1
	drop if _merge==2
	drop _merge

* Coarsen household head occ and respondent
foreach x in HH R {
	preserve
	use "../Crosswalks/Crosswalk_1968_ANES.dta", clear
	rename fatherocc occ`x'
	rename fatheroccej occ`x'_ej

	tempfile cross1
	save `cross1'
	restore 

	replace occ`x'=. if occ`x'==98 | occ`x'==19
	merge m:1 occ`x' using `cross1'
	assert occ`x'==. if _merge==1
	drop if _merge==2
	drop _merge
	}

/* Cannot construct dummies for whether hh head was dad, mom, other male,
   or other female when R was growing up. Can construct headofhh_father_imputed
   --will assume all obs with non-missing father occupation had their father as their 
   head of hh when growing up.
*/
gen headofhh_father_imputed = 1 if fatheroccej!=.  

** DUMMIES FOR WHEN WE KNOW WHY DAD OR MOM DIDN'T WORK (I.E. WHY FATHEROCCEJ/MOTHEROCCEJ IS MISSING) 
gen father_notworking = .
replace father_notworking =1 if inlist(fatherocc,910,911,920,950,960) 
replace father_notworking =0 if inrange(fatheroccej,1,81) & father_notworking==.
tab father_notworking, m 

//mother_notworking--not available. 

*Construct consistent, continuous edu var
gen yrsschool=.
replace yrsschool = 0 if educationR==0 & yrsschool==.
replace yrsschool = 1 if educationR==11 & yrsschool==.
replace yrsschool = 2 if educationR==12 & yrsschool==.
replace yrsschool = 3 if educationR==13 & yrsschool==.
replace yrsschool = 4 if educationR==14 & yrsschool==.
replace yrsschool = 5 if educationR==15 & yrsschool==.
replace yrsschool = 6 if educationR==16 & yrsschool==.
replace yrsschool = 7 if (educationR==17 | educationR==18) & yrsschool==.
replace yrsschool = 8 if (educationR==21 | educationR==22) & yrsschool==.
replace yrsschool = 9 if (educationR==31 | educationR==41) & yrsschool==.
replace yrsschool = 10 if (educationR==32 | educationR==42) & yrsschool==.
replace yrsschool = 11 if (educationR==33 | educationR==43)  & yrsschool==.
replace yrsschool = 12 if (educationR==50 | educationR==51 | educationR==61) & yrsschool==. 
replace yrsschool = 14 if (educationR==71) & yrsschool==.
replace yrsschool = 16 if educationR==81 & yrsschool==.
replace yrsschool = 17 if inrange(educationR,82,86) & yrsschool==.
replace yrsschool =. if inrange(educationR,98,99) /*inap, DK, NA*/
tab yrsschool, m

** Construct consistent education variable
gen eduR=.
replace eduR=0 if educationR==0
replace eduR=1 if educationR>=11 & educationR<=18 //less than grade school
replace eduR=2 if education==21 | education==22 //grade school
replace eduR=3 if (educationR>=31 & educationR<=33) | (educationR>=41 & educationR<=43) //some high school
replace eduR=4 if educationR==51  //HS degree
replace eduR=5 if educationR==61 | educationR==71 //some college/ hs + technical training
replace eduR=6 if educationR>=81 & educationR<87 //college degree
tab eduR,m 


** Family income consistent over time 
gen fam_inc=.   
replace fam_inc=0.75*1000 if fam_inc_raw==10 
replace fam_inc=1500 if fam_inc_raw==11 
replace fam_inc=2500 if fam_inc_raw==12 
replace fam_inc=3500 if fam_inc_raw==13 
replace fam_inc=4500 if fam_inc_raw==14 
replace fam_inc=5500 if fam_inc_raw==15 
replace fam_inc=6500 if fam_inc_raw==20 
replace fam_inc=7500 if fam_inc_raw==21 
replace fam_inc=8500 if fam_inc_raw==22 
replace fam_inc=9500 if fam_inc_raw==23 
replace fam_inc=11000 if fam_inc_raw==30 
replace fam_inc=13500 if fam_inc_raw==31 
replace fam_inc=17500 if fam_inc_raw==32 
replace fam_inc=22500 if fam_inc_raw==33 
replace fam_inc=1.25*25000 if fam_inc_raw==35 

*Similar measure but with 10 bins so it's more consistent with years around it
gen fam_inc_10bins=.
replace fam_inc_10bins=0.75*1000 if fam_inc_raw==10 
replace fam_inc_10bins=1500 if fam_inc_raw==11 
replace fam_inc_10bins=2500 if fam_inc_raw==12 
replace fam_inc_10bins=3500 if fam_inc_raw==13 
replace fam_inc_10bins=4500 if fam_inc_raw==14 
//all same until now
replace fam_inc_10bins=6000 if fam_inc_raw==15 | fam_inc_raw==20
replace fam_inc_10bins=8500 if fam_inc_raw==21 | fam_inc_raw==22 | fam_inc_raw==23
replace fam_inc_10bins=12500 if fam_inc_raw==30 | fam_inc_raw==31
replace fam_inc_10bins=20000 if fam_inc_raw==32 | fam_inc_raw==33
replace fam_inc_10bins=1.25*25000 if fam_inc_raw==35 

//note: the suffix "_son" is used to match the variable names in other datasets. All respondents (i.e., male and female) are given a value for these variables. 
gen bottomcoded_son = fam_inc==750 if fam_inc<.
gen topcoded_son = fam_inc==1.25*25000 if fam_inc<.


** Variable for father foreign born
gen fatherforeign=0
replace fatherforeign=. if fatherorigin==999 | fatherorigin==998 | origin==8 | origin==9 
replace fatherforeign=1 if (fatherorigin>=207 & fatherorigin<=508) ///
	| (placeborn>=209 & placeborn<=701) 
replace fatherforeign=1 if origin==3 | origin==4 

* Variable for respondent being foreign born
gen foreignborn = placeborn>200 if placeborn<999

* Want state where R grew up 
gen stategrow=placegrew
replace stategrow=. if placegrew>200 //grew up in different country
replace stategrow=. if placegrew==109 | placegrew==118 | placegrew==119 | placegrew==129 ///
	| placegrew==138 | placegrew==158 | placegrew==159 | placegrew==178 | placegrew==179 ///
	| (placegrew>=182 & placegrew<=199) //region, not state, available
replace stategrow = stategrow - 100 //makes this code match the state codes
label var stategrow "State where R grew up"

gen publicind=0
replace publicind=. if indR==991 | indR==999
replace publicind=1 if (indR>=906 & indR<=939)
replace publicind=1 if (occR==142 | occR==126 | occR==151 | occR==156) & indR==876

* Variable for where respondent raised
gen cityfarm=.
replace cityfarm=1 if raised==1 //farm
replace cityfarm=2 if raised==2 //small town
replace cityfarm=3 if raised==3 //small city
replace cityfarm=4 if raised==4 | raised==6 //large city or suburb

gen cityfarm2=.
replace cityfarm2=1 if raised==1 //farm
replace cityfarm2=2 if raised==2 //small town
replace cityfarm2=3 if raised==3 | raised==4 | raised==6 //city (6 is suburb)

gen urban=.
replace urban=0 if raised==1 | raised==2
replace urban=1 if raised>=3 & raised<=6

duplicates report id_anes //no duplicates
drop stryear *id_temp id 
sort id_anes

tempfile ANES68 
save `ANES68'


********************************************************************************

**********************
*** 1970 
**********************

use "./RawData/NES1970.dta", clear //download data from ANES website 
gen year=1970

* Exclude the sample of politically ineligible 18-20 year olds that were in the sample in this year.
drop if V700004==5 | V700004==6 | V700004==9 

keep year V700002 V700018 V700263 V700269 V700271 V700273 V700347 V700348 V700300 V700348 V700010 V700366 V700034 V700264 ///
V700275 V700284 V700298 V700299 V700304 V700392 V700381  V700365 V700364 V700277 V700263 V700369 V700267 V700184 ///
V700305 V700363 V700374 V700379 V700388 V700390 V700391 V700398 V700370 V700276 V700288 V700289 V700367 V700368 V700374 V700062 V700066 V700068 V700070 V700071 V700074 V700075 V700076 V700077 V700106 V700192 V700193 V700199 V700200 V700249 

* Father's occupation
rename V700370 fatherocc

* Mother's occupation
rename V700374 motherocc

bysort fatherocc: gen nvals = _n ==1
count if nvals 

bysort motherocc: gen nvals2 = _n ==1
count if nvals2 

** Weight
rename V700010 weight 

** Rename variables
	rename V700002 id
	rename V700018 state
	rename V700263 marital
	rename V700269 educationR
	rename V700275 empstatR
	rename V700298 empstatHH
	rename V700299 occHH
	rename V700300 indHH
	rename V700276 occR
	rename V700277 indR
	rename V700392 relate
	rename V700363 placeborn
	rename V700379 placegrew
	rename V700388 fam_inc_raw 
	rename V700390 sex
	rename V700391 race
	rename V700398 ageR
	rename V700381 raised
	rename V700347 union_raw
	rename V700348 union_raw2
	rename V700365 fatherorigin
	rename V700367 fatherside_origin
	rename V700368 motherside_origin
	rename V700366 motherorigin
	rename V700364 origin
	rename V700369 nationality
	rename V700267 R_totnumkids_0to17_livinginhh 
	rename V700034 number_adults
	rename V700184 R_partyid_detailed
	rename V700264 R_kids_now
	rename V700062 govtact_integschls
	rename V700066 govtact_fpubaccom
	rename V700068 Rop_housinginteg
	rename V700070 hmchange_blackpos
	rename V700071 speed_civrightsmovt
	rename V700074 locblacks_favdeseg
	rename V700075 locwhites_favdeseg  
	rename V700076 locwhites_favstrseg
	rename V700077 Rop_degreeraceseg
	rename V700106 govthelp_minorities
	rename V700192 Rfather_partyid
	rename V700193 Rmother_partyid
	rename V700199 vote68
	rename V700200 pres68
	rename V700249 thermometer_blacks
	

/*
    Note: Construct unique id. Per ANES documentation 
    (https://electionstudies.org/wp-content/uploads/2018/12/anes_timeseries_cdf_codebook_var.pdf), 
    this is done by combining year + id variable. 
*/
tostring year id, gen(stryear strid_temp)
egen id_temp = concat(stryear strid_temp)
destring id_temp, gen(id_anes)

* Variable for marriage
gen married = marital==1 if marital<9
tab married, m

gen widowed = marital==5 if marital<9
tab widowed, m

gen divorced = marital==3 if marital<9
tab divorced, m

gen separated = marital==4 if marital<9
tab separated, m

gen never_married = marital==2 if marital<9 
tab never_married, m

* Fix race variable
replace race =1 if race==3 | race==4 
replace race =. if race>2 
tab race,m //left with white and Black

* Children variable--# of children living in R's hh
replace R_totnumkids_0to17_livinginhh=. if R_totnumkids_0to17_livinginhh==9 

*flag indeterminate (high) # of children
gen flag_sevenplus_kidsunder18 = (R_totnumkids_0to17_livinginhh==7) if R_totnumkids_0to17_livinginhh<.
tab flag_sevenplus_kidsunder18,m 
label var flag_sevenplus_kidsunder18 "Dummy =1 if R has an indeterminate (7+) # of kids living in R's hh"
	
* Construct number in household 
gen R_hhsize_plusR = R_totnumkids_0to17_livinginhh + number_adults if R_totnumkids_0to17_livinginhh!=. 
tab R_hhsize_plusR,m 

gen R_hhsize_minusR = R_hhsize_plusR -1
tab R_hhsize_minusR,m 
	
label var R_hhsize_plusR "total # of persons in R's hh (including R)"
label var R_hhsize_minusR "total # of persons in R's hh (NOT including R)"		

* Hispanic variable
gen hispanic=race==3 | race==4 
replace hispanic=1 if placeborn==182 | placeborn==219 | placeborn==233 
replace hispanic=1 if nationality==219 | fatherorigin==219 | motherorigin==219 | fatherside_origin==219 | motherside_origin==219 //Mexican

* Age variable
replace ageR=. if ageR==.
gen agesq = ageR * ageR

* Variable for head of household
gen head = relate==1 if relate<9

* Variable for spouse
gen wifeR = relate==2 & sex==2 if relate<9 

** Union variables
tab union_raw 
gen union_hh=union_raw==1 if union_raw<8 

* Variable for respondent being in union
tab union_raw2
gen unionR=union_raw2==1 | union_raw2==3 | union_raw2==5 if union_raw<8
replace unionR=1 if (union_raw2==2 & relate==1) | (union_raw2==6 & relate==1) 

* Variable for anyone besides the respondent being in a union
gen union_other = (union_raw2==2 & relate!=1) | (union_raw2>=3 & union_raw2<=7) if union_raw<8

* Variable for R and someone else in the household
gen union_Rplus = union_raw2==3 | union_raw2==5 | (union_raw2==6 & relate==1) if union_raw<8

* Employment variables
	tab empstatR
	replace empstatR=. if empstatR==6
	replace empstatR=1 if empstatR==7 //counting those on strike as employed
	gen employed= empstatR==1 if empstatR<9
	gen retired = empstatR==3 if empstatR<9 //includes permanently disabled
	gen laborforce = employed==1 | empstatR==2 if empstatR<9 
	gen hwstd = empstatR==4 | empstatR==5 if empstatR<9
	
	tab empstatHH
	replace empstatHH=. if empstatHH==6
	replace empstatHH=1 if empstatHH==7 //counting those on strike as employed
	gen employedHH= empstatHH==1 if empstatHH<9
	gen retiredHH = empstatHH==3 if empstatHH<9 //includes permanently disabled
	gen laborforceHH = employedHH==1 | empstatHH==2 if empstatHH<9 
	gen hwstdHH = empstatHH==4 | empstatHH==5 if empstatHH<9
	

* Clean up occR 
replace occR=. if occR==999
replace occHH=. if occHH==999

** Coarsened variables (following Gallup numbering)
foreach x of varlist occR occHH{
gen coarse_`x'=.
replace coarse_`x'=1 if `x'>=101 & `x'<=160 //professional
replace coarse_`x'=2 if `x'>=201 & `x'<=275 //managers
replace coarse_`x'=3 if `x'>=301 & `x'<=328 //clerical
replace coarse_`x'=8 if `x'>=350 & `x'<=390 //sales
replace coarse_`x'=4 if `x'>=401 & `x'<=473 //skilled
replace coarse_`x'=5 if `x'>=501 & `x'<=560 //semi-skilled (operatives etc)
replace coarse_`x'=7 if `x'>=610 & `x'<=681 //service
replace coarse_`x'=0 if (`x'>=710 & `x'<=712) | (`x'>=810 & `x'<=850)  //farm laborer and operators
replace coarse_`x'=6 if `x'>=720 & `x'<=730 //non-farm laborers
} 

* Coarsen father occ
	merge m:1 fatherocc using "../Crosswalks/Crosswalk_1970_ANES.dta"
	assert fatherocc==. if _merge==1
	drop if _merge==2
	drop _merge

* Coarsen household head occ and respondent and mother
rename motherocc occMother 
foreach x in HH R Mother {
	preserve
	use "../Crosswalks/Crosswalk_1970_ANES.dta", clear
	rename fatherocc occ`x'
	rename fatheroccej occ`x'_ej

	tempfile cross1
	save `cross1'
	restore 

	replace occ`x'=. if occ`x'==98 | occ`x'==19
	merge m:1 occ`x' using `cross1'
	assert occ`x'==. if _merge==1
	drop if _merge==2
	drop _merge
	}

rename occMother_ej motheroccej
rename occMother motherocc

/* Cannot construct dummies for whether hh head was dad, mom, other male,
   or other female when R was growing up. Can construct headofhh_father_imputed
   --will assume all obs with non-missing father occupation had their father as their 
   head of hh when growing up.
*/
gen headofhh_father_imputed = 1 if fatheroccej!=.
replace headofhh_father_imputed = 0 if motheroccej!=. & fatheroccej==. 
tab headofhh_father_imputed,m 

** DUMMIES FOR WHEN WE KNOW WHY DAD OR MOM DIDN'T WORK (I.E. WHY FATHEROCCEJ/MOTHEROCCEJ IS MISSING) 
gen father_notworking = .
replace father_notworking =1 if inlist(fatherocc,910,911,920,950,960)
replace father_notworking =0 if inrange(fatheroccej,1,81) & father_notworking==.
tab father_notworking, m 
gen mother_notworking =.
replace mother_notworking =1 if inlist(motherocc,901,910,911,920,950,960)
replace mother_notworking =0 if inrange(motheroccej,1,81) & mother_notworking==.
tab mother_notworking, m 

*Construct consistent, continuous edu var
gen yrsschool=.
replace yrsschool = 0 if educationR==0 & yrsschool==.
replace yrsschool = 1 if educationR==11 & yrsschool==.
replace yrsschool = 2 if educationR==12 & yrsschool==.
replace yrsschool = 3 if educationR==13 & yrsschool==.
replace yrsschool = 4 if educationR==14 & yrsschool==.
replace yrsschool = 5 if educationR==15 & yrsschool==.
replace yrsschool = 6 if educationR==16 & yrsschool==.
replace yrsschool = 7 if (educationR==17 | educationR==18) & yrsschool==.
replace yrsschool = 8 if (educationR==21 | educationR==22) & yrsschool==.
replace yrsschool = 9 if (educationR==31 | educationR==41) & yrsschool==.
replace yrsschool = 10 if (educationR==32 | educationR==42) & yrsschool==.
replace yrsschool = 11 if (educationR==33 | educationR==43)  & yrsschool==.
replace yrsschool = 12 if (educationR==50 | educationR==51 | educationR==61) & yrsschool==. 
replace yrsschool = 14 if (educationR==71) & yrsschool==.
replace yrsschool = 16 if educationR==81 & yrsschool==.
replace yrsschool = 17 if inrange(educationR,82,86) & yrsschool==.
replace yrsschool =. if inrange(educationR,98,99) /*inap, DK, NA*/
tab yrsschool, m

** Construct consistent education variable
gen eduR=.
replace eduR=0 if educationR==0
replace eduR=1 if educationR>=11 & educationR<=18 //less than grade school
replace eduR=2 if education==21 | education==22 //grade school
replace eduR=3 if (educationR>=31 & educationR<=33) | (educationR>=41 & educationR<=43) //some high school
replace eduR=4 if educationR==51  //HS degree
replace eduR=5 if educationR==61 | educationR==71 //some college/ hs + technical training
replace eduR=6 if educationR>=81 & educationR<87 //college degree
tab eduR,m 


** Family income consistent over time
gen fam_inc=.   
replace fam_inc=0.75*1000 if fam_inc_raw==1 
replace fam_inc=1500 if fam_inc_raw==2 
replace fam_inc=2500 if fam_inc_raw==3 
replace fam_inc=3500 if fam_inc_raw==4 
replace fam_inc=4500 if fam_inc_raw==5 
replace fam_inc=6250 if fam_inc_raw==6 
replace fam_inc=8750 if fam_inc_raw==7 
replace fam_inc=12500 if fam_inc_raw==8 
replace fam_inc=20000 if fam_inc_raw==9 
replace fam_inc=1.25*25000 if fam_inc_raw==10

//note: the suffix "_son" is used to match the variable names in other datasets. All respondents (i.e., male and female) are given a value for these variables. 
gen bottomcoded_son = fam_inc==750 if fam_inc<.
gen topcoded_son = fam_inc==1.25*25000 if fam_inc<.

gen publicind=0
replace publicind=. if indR==0 | indR==999
replace publicind=1 if (indR>=906 & indR<=939) | ((occR==142 | occR==126 | occR==156) & indR==876)

** Variable for father foreign born
gen fatherforeign=0
replace fatherforeign=. if origin==8 | origin==9 
replace fatherforeign=1 if (fatherorigin>=209 & fatherorigin<=451) | (placeborn>=207 & placeborn<=508) 

* Variable for respondent being foreign born
gen foreignborn = placeborn>200 if placeborn<999

* Want state where R grew up 
gen stategrow=placegrew
replace stategrow=. if placegrew>200 //grew up in different country
replace stategrow=. if placegrew==109 | placegrew==118 | placegrew==119 | placegrew==129 ///
	| placegrew==138 | placegrew==158 | placegrew==159 | placegrew==178 | placegrew==179 ///
	| (placegrew>=182 & placegrew<=199) //region, not state, available
replace stategrow = stategrow - 100 //makes this code match the state codes
label var stategrow "State where R grew up"

* Variable for where respondent raised
gen cityfarm=.
replace cityfarm=1 if raised==1 //farm
replace cityfarm=2 if raised==2 //small town
replace cityfarm=3 if raised==3 //small city
replace cityfarm=4 if raised==4 | raised==6 //large city or suburb

gen cityfarm2=.
replace cityfarm2=1 if raised==1 //farm
replace cityfarm2=2 if raised==2 //small town
replace cityfarm2=3 if raised==3 | raised==4 | raised==6 //city (6 is suburb)

gen urban=.
replace urban=0 if raised==1 | raised==2
replace urban=1 if raised>=3 & raised<=6

duplicates report id_anes //no duplicates
drop stryear *id_temp id 
sort id_anes

tempfile ANES70 
save `ANES70'

********************************************************************************
********************************************************************************

append using `ANES68'
append using `ANES66'
append using `ANES64'
append using `ANES60'
append using `ANES58'
append using `ANES56'

* Label new variables
label var weight "Weight"
label var year "Year"
label var employed "Respondent is employed"
label var agesq "Age squared"
label var retired "Respondent is retired or permanently disabled"
label var laborforce "Respondent is in labor force (employed or unemployed)"
label var hwstd "Respondent is housewife or student"
label var employedHH "Household head is employed"
label var laborforceHH "Household head is in labor force"
label var retiredHH "Household head is retired"
label var hwstdHH "Household head is housewife or student"
label var unionR "Respondent is in union"
label var publicind "Respondent is in public-sector industry"
label var publicindHH "HH is in public-sector industry"
label var fatheroccej "Father's occ while R grew up, crosswalk"
label var motheroccej "Mother's occ while R grew up, crosswalk"
label var headofhh_father_imputed "Impute dad when parent occ!=missing & no info about hh head when R was kid"
label var occHH_ej "HH occupation, crosswalk"
label var eduR "Respondent's education, consistent"
label var fam_inc "Family income (midpoint of interval)"
label var fatherforeign "Father of respondent is foreign"
label var union_hh "Union household" 
label var cityfarm "Where R was raised, 4 categories"
label var cityfarm2 "Where R was raised, 3 categories"
label var wifeR "Respondent is wife of head"
label var union_Rplus "Respondent and someone else in union"
label var coarse_occR "Respondent occ, coarse"
label var coarse_occHH "Head of household occ, coarse"
label var union_other "1 if anyone besides R is in union"
label var married "Respondent is married"
label var never_married "Respondent has never been married"
label var widowed "Respondent is widowed"
label var divorced "Respondent is divorced"
label var separated "Respondent is separated"
label var urban "Respondent grew up in city"
label var R_totnumkids_0to17_livinginhh "Total # of kids (0-17) living in R's hh (max. 7)"
label var id_anes "R ID (unique identifier)"

* Label variable values

label drop V700267_ 

label define city 1 "City or suburb" 0 "Farm or small town"
label values urban city

label define edulabel3 0 "None" 1 "Some grade school or less" 2 "Grade school" 3 "Some HS" 4 "HS degree" 5 "Some college" 6 "College degree or more"
label values eduR edulabel3

#delimit ;
label define occs 0 "Farm, farm laborer" 1 "Professional" 2 "Businessmen" 3 "Clerical" 4 "Skilled labor" 5 "Semi-skilled labor" 
6 "Unskilled labor" 7 "Service worker" 8 "Sales";
#delimit cr	
label values coarse_occR occs
label values coarse_occHH occs

** Label states
#delimit ;
label define statecode
1 "Connecticut"
2 "Maine"
3 "Massachusetts"
4 "New Hampshire"
5 "Rhode Island"
6 "Vermont"
11 "Delaware"
12 "New Jersey"
13 "New York"
14 "Pennsylvania"
21 "Illinois"
22 "Indiana"
23 "Michigan"
24 "Ohio"
25 "Wisconsin"
31 "Iowa"
32 "Kansas"
33 "Minnesota"
34 "Missouri"
35 "Nebraska"
36 "North Dakota"
37 "South Dakota"
41 "Alabama"
42 "Arkansas"
43 "Florida"
44 "Georgia"
45 "Louisiana"
46 "Mississippi"
47 "North Carolina"
48 "South Carolina"
49 "Texas"
40 "Virginia"
51 "Kentucky"
52 "Maryland"
53 "Oklahoma"
54 "Tennessee"
55 "DC"
56 "West Virginia"
61 "Arizona"
62 "Colorado"
63 "Idaho"
64 "Montana"
65 "Nevada"
66 "New Mexico"
67 "Utah"
68 "Wyoming"
71 "California"
72 "Oregon"
73 "Washington"
80 "Alaska"
81 "Hawaii"
82 "Puerto Rico";
#delimit cr
label values stategrow statecode
label values state statecode

* Region variable
gen region=.
replace region=1 if state>=1 & state<=6 //new england 
replace region=2 if (state>=11 & state<=14) | state==52 | state==55 //mid-atlantic + DC + MD
replace region=3 if (state>=21 & state<=25) | state==56 | state==51 //east north central + WV + KY
replace region=4 if state>=31 & state<=37 //west north central
replace region=5 if (state>=40 & state<=49) | state==53 | state==54 //solid south + TN + OK
replace region=6 if state>=61 & state<=69 //mountain states
replace region=7 if state>=71 & state<=73 //pacific

label define regionvals 1 "New England" 2 "Mid-Atlantic" 3 "East North Central" 4 "West North Central" 5 "South" 6 "Mountain" 7 "Pacific"
label values region regionvals

* Drop unwanted variables
drop V*
* save one directory up 
compress
sort id_anes
order id_anes weight year
save "./output/ANES_56to70cleaner.dta", replace
